library(tidyverse)
library(tigris)
library(censusapi)
library(sf)
library(mapview)
library(plotly)
options(
tigris_class = "sf",
tigris_use_cache = T # This stores tigris loads somewhere on your machine for much faster personal loading.
)
This script uses an output from load_safegraph.Rmd which processes down the daily social distancing dataset from Safegraph to just block groups in the Bay Area.
Loading geometries
These were the high-level lists of Bay county names, county shapes, and block group IDs from the other script, in case they become useful again.
bay_county_names <-
c(
"Alameda",
"Contra Costa",
"Marin",
"Napa",
"San Francisco",
"San Mateo",
"Santa Clara",
"Solano",
"Sonoma"
)
bay_blockgroups <-
bay_county_names %>%
map(function(x){
block_groups("CA",x,progress_bar=F) %>%
pull(GEOID)
}) %>% unlist()
bay_counties <-
counties("CA", cb = F, progress_bar=F) %>%
filter(NAME %in% bay_county_names)
The following returns all of the block groups in North Fair Oaks.
smc_blockgroups <-
block_groups("CA","San Mateo", cb=F, progress_bar=F)
nfo_boundary <-
places("CA", cb=F, progress_bar=F) %>%
filter(NAME == "North Fair Oaks")
nfo_blockgroups <-
smc_blockgroups %>%
dplyr::select(GEOID) %>%
st_join(nfo_boundary %>% dplyr::select(geometry), left = F) #%>%
#st_set_geometry(NULL) %>%
#left_join(smc_blockgroups %>% dplyr::select(GEOID)) %>%
#st_as_sf()
mapview(nfo_blockgroups)+mapview(nfo_boundary,alpha.region= 0, color = "red", lwd = 4)
There is a block group that spreads into the baylands and doesn’t contain a significant residential area, so like in San Jose, I remove this particular block.
nfo_blockgroups <-
nfo_blockgroups %>%
filter(!GEOID %in% c("060816117004"))
mapview(nfo_blockgroups)
Loading social distancing data
bay_socialdistancing <-
readRDS("/Users/juliawagenfehr/F/Restricted Data Library/Safegraph/covid19analysis/bay_socialdistancing.rds")
nfo_socialdistancing <-
bay_socialdistancing %>%
filter(origin_census_block_group %in% nfo_blockgroups$GEOID)
Analyzing social distance data
# helps to quickly check if fields have missing data before you do math.
sum(is.na(nfo_socialdistancing$completely_home_device_count))
## [1] 0
sum(is.na(nfo_socialdistancing$device_count))
## [1] 0
nfo_percenthome_bg <-
nfo_socialdistancing %>%
mutate(
`% Completely at Home` = completely_home_device_count/device_count,
date = date_range_start %>% substr(1,10) %>% as.Date()
) %>%
dplyr::select(origin_census_block_group,date,completely_home_device_count,device_count,`% Completely at Home`)
nfo_percenthome_time <-
nfo_percenthome_bg %>%
group_by(date) %>%
summarize(
completely_home_device_count = sum(completely_home_device_count),
device_count = sum(device_count)
) %>%
mutate(`% Completely at Home` = (completely_home_device_count/device_count*100) %>% round(1))
chart <-
nfo_percenthome_time %>%
ggplot(
aes(
x = date,
y = `% Completely at Home`
)
) +
geom_line() +
labs(
x = "Date",
y = "Percent staying completely at home",
title = "North Fair Oaks, CA"
)
chart %>% ggplotly()
We can add the non-pharmaceutical interventions (NPIs) in the graph. Here’s info about SMC’s shelter in place order, 3/16/20.
chart <-
chart +
geom_vline(
aes(
xintercept = "2020-03-16" %>% as.Date() %>% as.numeric() # ggplotly needs vlines to be numeric
),
linetype="dotted"
) +
annotate("text",label= "San Mateo County\nShelter-in-Place Order", color = "black", x=(("2020-03-16" %>% as.Date())-6), y=55, size=2)
chart %>% ggplotly()
nfo_percenthome_map <-
nfo_percenthome_bg %>%
filter(date > max(date)-3) %>%
group_by(origin_census_block_group) %>%
summarize(`% Completely at Home` = round(mean(`% Completely at Home`)*100,1)) %>%
left_join(nfo_blockgroups, by = c("origin_census_block_group" = "GEOID")) %>%
st_as_sf()
mapview(nfo_percenthome_map, zcol = "% Completely at Home", layer.name = "% Completely<br>at Home,<br>Average 4/1-4/3")